
* ---------------------------------------------------
* Event History Analysis
* Josef Brderl, March 2011
* Entry into Motherhood (data from ALLBUS 2000)
* ---------------------------------------------------

* duration: age at birth of first child - 14 (measured in years only)
* child: =1 if child, =0 if censored at time of interview
* educ: years of education
* east: =1 if born in East Gerany, =0 if born in West Germany
* coh: birth cohort dummies

version 11
cd "K:\Vorlesung EHA\Stata Beispiele\"    //working directory

***********************************************
* -----------------------------------
*     Continuous Time EHA
* -----------------------------------
***********************************************

* Load data
use Motherhood.dta, clear

* Declare st data
stset duration, failure(child==1) 

* Check what stset did
list persnr duration child _t0 _t _d educ east cohort in 1/10, nol
stdes
stvary



***************************************
* Life-Table
***************************************
ltable duration child, survival hazard i(0 6 11 16 21 26 36)



***************************************
* Kaplan-Meier
***************************************
sts list

* Kaplan-Meier survivor function
sts graph, survival tmax(30) ci                                 ///
       xtitle("age - 14", size(large) margin(0 0 0 2))          ///
	   ytitle("proportion childless", size(large))              ///
	   xlabel(0(5)30, labsize(medium))                          ///
	   ylabel(0(0.1)1, angle(0) labsize(medium) format(%3.1f))

* "Smoothed" hazard function
sts graph, hazard tmax(30) ci width(2)                          ///
       xtitle("age - 14", size(large) margin(0 0 0 2))          ///
	   ytitle("fertility rate", size(large))                    ///
       xlabel(0(5)30, labsize(medium))                          ///
	   ylabel(0(0.02)0.12, angle(0) grid labsize(medium) format(%4.2f))

* Kaplan-Meier survivor function by east
sts graph, survival by(east) ci tmax(30) xsize(4.5)              ///
       xtitle("age - 14", size(large) margin(0 0 0 2))           ///
	   ytitle("proportion childless", size(large))               ///
	   xlabel(0(5)30, labsize(medium))                           ///
	   ylabel(0(0.1)1, angle(0) labsize(medium) format(%3.1f))   ///
	   legend(pos(1) ring(0) row(2) order(1 2 3 4) lab(2 "West") ///
		lab(4 "East") size(medlarge))     
sts test east

* Ignoring censored observations
sts graph if cohort>=4, survival by(cohort) ci tmax(30) xsize(5)    ///
       xtitle("age - 14", size(large) margin(0 0 0 2))              ///
	   ytitle("proportion childless", size(large))                  ///
	   title("Survival Curves by Birth Cohort")                     ///
	   subtitle("including censored cases")                         ///
	   xlabel(0(5)30, labsize(medium))                              ///
	   ylabel(0(0.1)1, angle(0) labsize(medium) format(%3.1f))      ///
	   legend(pos(1) ring(0) row(2) order(1 2 3 4) lab(2 "1951-65") ///
		lab(4 "1966-81") size(medlarge))     
sts graph if cohort>=4&child==1, survival by(cohort) ci tmax(30) xsize(5) ///
       xtitle("age - 14", size(large) margin(0 0 0 2))              ///
	   ytitle("proportion childless", size(large))                  ///
	   title("Survival Curves by Birth Cohort")                     ///
	   subtitle("excluding censored cases")                         ///
	   xlabel(0(5)30, labsize(medium))                              ///
	   ylabel(0(0.1)1, angle(0) labsize(medium) format(%3.1f))      ///
	   legend(pos(1) ring(0) row(2) order(1 2 3 4) lab(2 "1951-65") ///
		lab(4 "1966-81") size(medlarge))

* Treating censored observations as events
/* stset duration		
sts graph if cohort>=4, survival by(cohort) ci tmax(30) xsize(5) ///
       xtitle("age - 14", size(large) margin(0 0 0 2))              ///
	   ytitle("proportion childless", size(large))                  ///
	   title("Survival Curves by Birth Cohort")                     ///
	   subtitle("treating censored cases as events")                ///
	   xlabel(0(5)30, labsize(medium))                              ///
	   ylabel(0(0.1)1, angle(0) labsize(medium) format(%3.1f))      ///
	   legend(pos(1) ring(0) row(2) order(1 2 3 4) lab(2 "1951-65") ///
		lab(4 "1966-81") size(medlarge))  */

		
		
****************************************
* Parametric PH Models
****************************************
streg educ east coh2 coh3 coh4 coh5, dist(exponential)  //exponential model
streg, nohr                        //redisplays estimates, no hazard ratios

* Conditional Effect Plots (rate function)
stcurve, hazard                                      ///
         at1(east=0) at2(east=1) range (0 30)        ///
         xlabel(0(5)30) ylabel(0(0.02)0.12)

* Conditional Effect Plots (survivor function)
stcurve, survival at1(east=0) at2(east=1) range (0 30)              ///
       xtitle("age - 14", size(large) margin(0 0 0 2))              ///
	   ytitle("proportion childless", size(large))                  ///
	   xlabel(0(5)30, labsize(medium))                              ///
	   ylabel(0(0.1)1, angle(0) grid labsize(medium) format(%3.1f)) ///
	   legend(pos(1) ring(0) row(2) order(1 2) lab(1 "West")        ///
		lab(2 "East") size(medlarge))  xsize(4.5)   
       

	   
****************************************
* Cox-Model
****************************************
stcox educ east coh2 coh3 coh4 coh5 
stcox, nohr                            //redisplays estimates, no hazard ratios


* Conditional Effect Plots (rate  function)
* Testing the PH assumption by comparing with non-parametric rate estimates
sts graph, hazard by(east) tmax(30) width(2 2) xsize(4.5)                ///
       plot1opts(lwidth(thick)) plot2opts(lwidth(thick))                 ///
       title("Non-parametric hazard estimate")                           ///
       xtitle("age - 14", size(large) margin(0 0 0 2))                   ///
	   ytitle("fertility rate", size(large))                             ///
       xlabel(0(5)30, labsize(medium))                                   ///
	   ylabel(0(0.025)0.15, angle(0) grid labsize(medium) format(%5.3f)) ///
   	   legend(pos(1) ring(0) row(2) order(1 2) lab(1 "West")             ///
		lab(2 "East") size(medlarge))   

stcox educ east coh2 coh3 coh4 coh5, basehc(hr1)
stcurve, hazard at1(east=0) at2(east=1) range (0 30) width(2) xsize(4.5) ///
       lwidth(thick thick)                                               ///
       title("Cox model hazard estimate")                                ///
       xtitle("age - 14", size(large) margin(0 0 0 2))                   ///
	   ytitle("fertility rate", size(large))                             ///
       xlabel(0(5)30, labsize(medium))                                   ///
	   ylabel(0(0.025)0.15, angle(0) grid labsize(medium) format(%5.3f)) ///
   	   legend(pos(1) ring(0) row(2) order(1 2) lab(1 "West")             ///
		lab(2 "East") size(medlarge))   

		
* Testing the PH assumption by further graphical tests
stphplot, by(east)       
stcoxkm, by(east)        


* Testing the PH assumption by tvc (interaction with analysis time)
* Is the interaction with analysis time significant?
stcox educ east coh2 coh3 coh4 coh5, tvc(east) texp(_t)   


* Testing the PH assumption by Schoenfeld residuals
stcox educ east coh2 coh3 coh4 coh5, schoenfeld(sch*) scaledsch(sca*)
estat phtest, detail            //formal test via Schoenfeld residuals



****************************************
* Parametric AFT Models
****************************************
streg educ east coh2 coh3 coh4 coh5, dist(loglogistic) 
* Note: AFT models in Stata are not in the "rate metric", but in "time metric"
* Therefore, coefficients have the opposite sign.
* If you want the rate metric multiply coeff by -1 and divide by gamma


* Conditional Effect Plots (rate function)
stcurve, hazard at1(east=0) at2(east=1) range (0 30) xsize(4.5)          ///
       lwidth(thick thick)                                               ///
       title("Log-logistic model hazard estimate")                       ///
       xtitle("age - 14", size(large) margin(0 0 0 2))                   ///
	   ytitle("fertility rate", size(large))                             ///
       xlabel(0(5)30, labsize(medium))                                   ///
	   ylabel(0(0.025)0.15, angle(0) grid labsize(medium) format(%5.3f)) ///
   	   legend(pos(1) ring(0) row(2) order(1 2) lab(1 "West")             ///
		lab(2 "East") size(medlarge))   


* Improving interpretation *****************

* a) Time ratios: multiplicative effects on duration
streg, tr                                    //redisplays estimates, time ratios

* b) Marginal effects on median duration
mfx compute, predict(median time) nose

* c) Predicting median duration (from this one could compute exact unit effects)
predict meddur, median time
list educ coh2 coh3 coh4 coh5 east meddur in 1/15



******************************************
*   Comparing Models
******************************************
streg educ east coh2 coh3 coh4 coh5, dist(exponential) nohr //Exponential regression
est store exponen
streg educ east coh2 coh3 coh4 coh5, dist(loglogistic)      //Log-logistic regression
matrix b = -(1/e(gamma)) * e(b)               //transforming estimates to rate metric
est store loglog
stcox educ east coh2 coh3 coh4 coh5,                   nohr //Cox regression
est store cox

estimates table exponen cox loglog, stats(b t) b(%9.2f) t(%9.2f) ///
                equations(1) keep(educ east coh2 coh3 coh4 coh5) 

* Log-logistic coeficients in rate metric
matrix list b

* Comparing models via IC
estimates stats exponen loglog
